# 读取数据
data <- read.csv("tfidf_all_SF3B1.csv",row.names = 1,check.names = F)

# 提取DL-1到DL-101的列数据
feature_vectors <- data[, 2:102]

# 提取'location'列数据
location <- data$group

# 创建一个空的结果列表
results <- list()

# 循环遍历每个特征向量进行Wilcoxon检验
for (i in 1:101) {
  feature <- feature_vectors[, i]
  result <- wilcox.test(feature ~ location)
  results[[i]] <- result$p.value
}

# 进行FDR校正
p_adjusted <- p.adjust(unlist(results), method = "none")

data_2<-as.data.frame(t(feature_vectors))

data_2$p_adjusted<-p_adjusted
# 提取显著特征值
data_2 <- data_2[which(p_adjusted < 0.01),]

select_features<-as.data.frame(t(data_2[,-71]))

# 输出结果
write.csv(select_features,"select_features_SF3B1.csv")
